clear all
*cap log close
set more off
set seed 603


* set parameters ------------------------------
local Y  = "cite"
* graph positioning ---------
local yhi =  0.015
local ylo = -0.015
local yst =  0.005
local ytx =  0.014 
local xtx =  0.450
local lpo =  7
* ---------------------------------------------


// panel a: trimming selected samples -----------------------------------
use "${data}/out/4-main", clear
// take predicted recidvism net of beat-shifts ---------
reghdfe hatho_`Y', absorb(totfe) nocons resid
predict rhat, resid
gen N = 1
collapse (sum) N (mean) rhat, by(officerid)
keep if N >= 50


// across officer percentiles -----------------------------
// easier to loop: percentiles in each direction ----------
sort rhat
gen rank = _n
summ rank, d
gen pctile1 = (rank/r(max))*100
drop rank 

gsort -rhat
gen rank = _n
summ rank, d
gen pctile2 = (rank/r(max))*100
drop rank

keep officerid pctile1 pctile2
tempfile temp
save    `temp'


// trimmed estimates -----------------------------
use "${data}/out/4-main", clear
merge m:1 officerid using `temp', keep(1 3) nogen 
replace pctile1 = 1 if mi(pctile1)
replace pctile2 = 1 if mi(pctile2)

gen x  = .
gen b  = .
gen ub = . 
gen lb = .

local k = 1
foreach p in 0 1 5 10 15 20 25 {
	replace x = `p' if _n == `k'
	ivreghdfe `Y'_ny1 (harsh = Z) if pctile1>`p' & pctile2>`p', absorb(totfe) vce(cluster officerid)
	replace b  = _b[harsh] if _n == `k'
	replace ub = _b[harsh] + 1.96*_se[harsh] if _n == `k'
	replace lb = _b[harsh] - 1.96*_se[harsh] if _n == `k'
	local ++k
}

// construct plot -----------------------------
#delimit ;
twoway rspike ub lb x, lcolor(emidblue) ||
scatter b x, msymbol(O) mcolor(navy) 
graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin))
ylab(-0.025(0.005)0.01,nogrid) xlab(,nogrid)
yline(0,lpattern(dash) lcolor(cranberry))
ytitle("")
xtitle("Percentile of Officers Trimmed") 
legend(ring(0) pos(11) order(2 1) cols(1) region(lstyle(border))
lab(2 "IV Estimate") lab(1 "95% CI")) ; 
#delimit cr
graph export "${out}/apx_deter/selection_trim.pdf", replace 
// ---------------------------------------------------------------------



// panel b: heckman correction -----------------------------------------
use "${data}/out/4-main", clear
gen Y = `Y'_ny1

egen offdaytag = tag(officerid countynum offensedate)
bysort officerid countynum year: egen days = total(offdaytag)
bysort officerid countynum year: gen tickets = _N

gen dailytickets = tickets/days
bysort countynum year: egen p95 = pctile(dailytickets), p(95)
gen rho = dailytickets/p95
replace rho = 1 if rho>1
gen inverse = invnormal(rho)
gen mills = normalden(inverse)/rho
replace mills = 0 if mills==.
label variable mills "Selection Correction"

** keep only obs where officer working in most common county **
bysort officerid year: egen most_n = max(tickets)
gen sample = tickets==most_n
label variable sample "Observations in county where officer wrote most tickets in year"
keep if sample == 1

// store coefficient adjusting for mills --------------
reghdfe Y Z mills, absorb(totfe) vce(cluster officerid)
local B = _b[Z]
ivreghdfe Y mills (harsh = Z), absorb(totfe) vce(cluster officerid)
local b = "{it:{&beta}{subscript:IV}} = `:di %5.4f _b[harsh]' (`:di %5.4f _se[harsh]')"

// Control mean? -------------------------------
*summ Y if lenient == 1
*local m = "{&mu} = `:di %4.3f r(mean)'"

// setup for residual plot -------------
reghdfe Y mills, absorb(totfe) nocons resid
predict rY, resid
reghdfe Z mills, absorb(totfe) nocons resid
predict rZ, resid

// manual binscatter --------------------
binscatter rY rZ, nq(20) nodraw gen(BIN)
gen xBIN = _n if _n <= 20
gen mZ = .
gen mY = .

forval i = 1/20 {
	summ rZ if BIN == `i'
	replace mZ = r(mean) if xBIN == `i'
	summ rY if BIN == `i'
	replace mY = r(mean) if xBIN == `i'
}


// nonparametric fit ---------------------
_pctile rZ, n(100)
scalar loZ = r(r2)
scalar hiZ = r(r98)

*local linear regression with degree=2, bw=0.15, kernel=triangular
lpoly rY rZ if rZ>loZ & rZ<hiZ, nogr ci gen(x y) se(yse) degree(0) bw(0.2) kernel(tri)
cap g ylb = y - 1.96*yse
cap g yub = y + 1.96*yse


// build graph ----------------------------
#delimit ;
twoway rarea yub ylb x, fintensity(15) fcolor(emidblue) lwidth(none) ||
line y x, lwidth(medthick) lcolor(forest_green) ||
scatter mY mZ, msize(medlarge) msymbol(Oh) mcolor(dknavy) ||
lfit mY mZ, lwidth(medthick) lcolor(forest_green) lpattern(dash) 
graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin))
ylab(`ylo'(`yst')`yhi',nogrid) xlab(-0.5(0.25)0.5,nogrid)
xtitle("Officer Stringency")
legend(ring(0) pos(`lpo') order(3 4 2 1) cols(2) region(lstyle(border))
lab(3 "Local Mean") lab(4 "Linear Fit") lab(2 "Nonparametric Fit") lab(1 "95% CI"))
text(`ytx' `xtx' "`b'", place(sw) size(medlarge)) ;
#delimit cr
graph export "${out}/apx_deter/selection_heckman.pdf", replace  
// ---------------------------------------------------------------------



// panel c: GPS fixed effects -----------------------------------------
use "${data}/out/4-main", clear
gen Y = `Y'_ny1
keep if !mi(gpsfe)
local FE = "gpsfe"

// store coefficient using GPS FE --------------
reghdfe Y Z, absorb(gpsfe) vce(cluster officerid)
local B = _b[Z]
ivreghdfe Y (harsh = Z), absorb(gpsfe) vce(cluster officerid)
local b = "{it:{&beta}{subscript:IV}} = `:di %5.4f _b[harsh]' (`:di %5.4f _se[harsh]')"

// setup for residual plot -------------
reghdfe Y, absorb(gpsfe) nocons resid
predict rY, resid
reghdfe Z, absorb(gpsfe) nocons resid
predict rZ, resid


// manual binscatter --------------------
binscatter rY rZ, nq(20) nodraw gen(BIN)
gen xBIN = _n if _n <= 20
gen mZ = .
gen mY = .
forval i = 1/20 {
	summ rZ if BIN == `i'
	replace mZ = r(mean) if xBIN == `i'
	summ rY if BIN == `i'
	replace mY = r(mean) if xBIN == `i'
}


// Nonparametric fit ---------------------
_pctile rZ, n(100)
scalar loZ = r(r2)
scalar hiZ = r(r98)

*local linear regression with degree=2, bw=0.15, kernel=triangular
lpoly rY rZ if rZ>loZ & rZ<hiZ, nogr ci gen(x y) se(yse) degree(0) bw(0.2) kernel(tri)
cap g ylb = y - 1.96*yse
cap g yub = y + 1.96*yse


// Build graph ----------------------------
#delimit ;
twoway rarea yub ylb x, fintensity(15) fcolor(emidblue) lwidth(none) ||
line y x, lwidth(medthick) lcolor(forest_green) ||
scatter mY mZ, msize(medlarge) msymbol(Oh) mcolor(dknavy) ||
lfit mY mZ, lwidth(medthick) lcolor(forest_green) lpattern(dash) 
graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin))
ylab(`ylo'(`yst')`yhi',nogrid) xlab(-0.5(0.25)0.5,nogrid)
xtitle("Officer Stringency")
legend(ring(0) pos(`lpo') order(3 4 2 1) cols(2) region(lstyle(border))
lab(3 "Local Mean") lab(4 "Linear Fit") lab(2 "Nonparametric Fit") lab(1 "95% CI"))
text(`ytx' `xtx' "`b'", place(sw) size(medlarge)) ;
#delimit cr
graph export "${out}/apx_deter/selection_gps.pdf", replace  
// ---------------------------------------------------------------------


